DEGs Visualization

Loading the necessary packages

pacman::p_load(tidyverse, DESeq2, ggVennDiagram, UpSetR, plotly, ggrepel, scico, apeglm)

Loading the data

# The list of DEG results
res<-readRDS("./results/deseq2_regions_results_local.rds")
summary(res)
##         Length Class        Mode
## central 6      DESeqResults S4  
## south   6      DESeqResults S4

Loading the Annotations

This annotation file contains all P. cultripes transcripts by rows. As columns, we can find: * The gene IDs for P. cultripes, followed by the transcripts IDs and the peptides IDs (gene_id, transcript_id, peptide_id) * The IDs and descriptions of X. tropicalis annotated proteome resulting from both nucleotide and peptide blasting against P. cultripes transcripts (xenx_pep_id, xenx_gene_symbol, xenx_description, xenp_pep_id, xenp_gene_symbol, xenp_description).

# The annotation file
xtrop<-read.csv("./xtr109/diamondblast109.csv", stringsAsFactors = FALSE)

Extracting Significant DEGs

Extract DEGs lists in order to visualize overlapping regulated genes in the comparison of interest.

# List of DEGs

# Creating a function to extract the lists of genes repeatedly: all DEGs, up-regulated DEGs and down-regulated DEGs.

extract_degs<-function(x) {
  return(
    x %>%
      as_tibble(rownames = "gene") %>%
      filter(padj<0.05) %>%
      pull(gene)
  )
}

extract_up<-function(x) {
  return(
    x %>%
      as_tibble(rownames = "gene") %>%
      filter(padj<0.05) %>%
      filter(log2FoldChange>0) %>%
      pull(gene)
  )
}

extract_down<-function(x) {
  return(
    x %>%
      as_tibble(rownames = "gene") %>%
      filter(padj<0.05) %>%
      filter(log2FoldChange<0) %>%
      pull(gene)
  )
}

# Extracting all differentially expressed genes from all the comparisons stored in the list of DESeqResults.

sig_degs<-lapply(res, FUN=extract_degs)
str(sig_degs)
## List of 2
##  $ central: chr [1:99] "PECUL23A000127" "PECUL23A000194" "PECUL23A000652" "PECUL23A001362" ...
##  $ south  : chr [1:1040] "PECUL23A000011" "PECUL23A000059" "PECUL23A000253" "PECUL23A000300" ...
up_degs<-lapply(res, FUN=extract_up)
down_degs<-lapply(res, FUN=extract_down)

Venn Diagram

library(ggvenn)
## Loading required package: grid
sig_degs_set <- list(Central=sig_degs$central,
                  South=sig_degs$south)

venn_palette <- c("dodgerblue","darkblue")

ggvenn(sig_degs_set, columns = c("Central","South"),stroke_size = 0.3,
       fill_color = venn_palette, stroke_color="black", show_percentage=F,
       fill_alpha=0.6, set_name_color = "black", text_size=6)

Upset Plot

# Plot Upset

upset(fromList(sig_degs),
      nsets = length(sig_degs),
      keep.order = T,
      nintersects = 100,
      number.angles = 0, point.size = 3, line.size = 1,
      sets.x.label = "Number of DEGs",
      set_size.show = TRUE,
      set_size.scale_max = max(sapply(sig_degs, length))+200, 
      text.scale = c(1.2, 1.2, 1.2, 1.2, 1.5, 1.5),
      sets.bar.color = c("dodgerblue","darkblue"),
      order.by=c("degree","freq"))

Volcano plot

Showing the log fold change plotted against the -log10() transformed adjusted p-values per gene.

Central Volcano Plot

genes_to_label <- c("mcl1", "mmp9.1")

central_data <- res$central %>%
  as_tibble(rownames = "gene_id") %>%
  drop_na(padj) %>% # drop all genes with NAs
  filter(padj < 0.5) %>% # reduce the number of points that need to be plotted
  mutate(
    category = case_when(
      padj < 0.05 & log2FoldChange > 0 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < 0 ~ "Downregulated",
      TRUE ~ "Non-significant"
    )
  ) %>%
  left_join(xtrop, by = c("gene_id")) # add annotations

# Calculate the number of upregulated and downregulated genes
n_upregulated <- sum(central_data$category == "Upregulated")
n_downregulated <- sum(central_data$category == "Downregulated")

# Create the plot
gg_central <- central_data %>%
  ggplot(aes(
    x = log2FoldChange, y = -log10(padj), color = category,
    text = paste0("</br>Pcu23 gene: ", gene_id,
                  "</br>X.tr peptide: ", xenp_gene_symbol,
                  "</br>X.tr description: ", xenp_description)
  )) +
  geom_point(alpha = 0.75, shape = 16) +
  scale_color_manual(values = c(
    "Non-significant" = "#808080",
    "Upregulated" = "#FF5733",
    "Downregulated" = "#0F8CBA"
  )) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#555555") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "#555555") +
  annotate("text", x = 5, y = max(-log10(central_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Upregulated: ", n_upregulated), color = "#FF5733", hjust = 1) +
  annotate("text", x = -5, y = max(-log10(central_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Downregulated: ", n_downregulated), color = "#0F8CBA", hjust = 0) +
  scale_x_continuous(breaks = seq(-6, 6, by = 2), limits = c(-6, 6)) +
  theme_minimal() +
  theme(
    legend.position = "none",
  ) +
  geom_text_repel(
    data = subset(central_data, xenp_gene_symbol %in% genes_to_label),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(1.7, "lines"),
    point.padding = unit(0.1, "lines"),
    color = "black",
    segment.color = 'black'
  )

gg_central
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("central_volcanoplot.jpeg", width = 7, height = 5 , dpi = 600)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

South Volcano Plot

genes_to_label <- c("mmp11.1", "ass1", "sec14l3", "stard8", "klf9", "hsp90aa1.1")
extra_gene <- "PECUL23A051069T1"
extra_gene_2 <- "leprotl1"


south_data <- res$south %>%
  as_tibble(rownames = "gene_id") %>%
  drop_na(padj) %>% # drop all genes with NAs
  filter(padj < 0.5) %>% # reduce the number of points that need to be plotted
  mutate(
    category = case_when(
      padj < 0.05 & log2FoldChange > 0 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < 0 ~ "Downregulated",
      TRUE ~ "Non-significant"
    )
  ) %>%
  left_join(xtrop, by = c("gene_id")) # add annotations

# Calculate the number of upregulated and downregulated genes
n_upregulated <- sum(south_data$category == "Upregulated")
n_downregulated <- sum(south_data$category == "Downregulated")

# Create the plot
gg_south <- south_data %>%
  ggplot(aes(
    x = log2FoldChange, y = -log10(padj), color = category,
    text = paste0("</br>Pcu23 gene: ", gene_id,
                  "</br>X.tr peptide: ", xenp_pep_id,
                  "</br>X.tr description: ", xenp_description)
  )) +
  geom_point(alpha = 0.75, shape = 16) +
  scale_color_manual(values = c(
    "Non-significant" = "#808080",
    "Upregulated" = "#FF5733",
    "Downregulated" = "#0F8CBA"
  )) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#555555") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "#555555") +
  annotate("text", x = 5, y = max(-log10(south_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Upregulated: ", n_upregulated), color = "#FF5733", hjust = 1) +
  annotate("text", x = -5, y = max(-log10(south_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Downregulated: ", n_downregulated), color = "#0F8CBA", hjust = 0) +
  scale_x_continuous(breaks = seq(-6, 6, by = 2), limits = c(-6, 6)) +
  theme_minimal() +
  theme(
    legend.position = "none",
  ) +
  geom_text_repel(
    data = subset(south_data, xenp_gene_symbol %in% genes_to_label),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(3, "lines"),
    point.padding = unit(0, "lines"),
    color = "black",
    segment.color = "black",
    max.overlaps = Inf
  ) +
  geom_text_repel(
    data = subset(south_data, transcript_id %in% extra_gene),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(2.7, "lines"),
    point.padding = unit(0, "lines"),
    color = "black",
    segment.color = "black",
    max.overlaps = Inf
  ) +
  geom_text_repel(
    data = subset(south_data, xenp_gene_symbol %in% extra_gene_2),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(3, "lines"),
    point.padding = unit(0, "lines"),
    color = "black",
    segment.color = "black",
    max.overlaps = Inf
  )

gg_south
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("south_volcanoplot.jpeg", width = 7, height = 5 , dpi = 600)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Getting information about interesting genes

We filter in the DESeq2 results object those DEGs which are significantly DE.

pull_genes<-function(x, alpha=0.05, lfc=0, signed="both", feature="xenp_gene_symbol") {
  
  # filter by adjusted p
  x<-x %>%
    filter(padj<alpha)
  
  if(signed=="up"){
    x<-x %>%
      filter(log2FoldChange>lfc) %>%
      pull(feature)
  }
  
  if(signed=="down"){
    x<-x %>%
      filter(log2FoldChange<(lfc*-1)) %>%
      pull(feature) 
  }
  
  if(signed=="both"){
    x<-x %>%
      filter(abs(log2FoldChange)>lfc) %>%
      pull(feature) 
  }
  
  ## deal with multiple gene annotations for the same gene (different transcripts)
  
  x<-strsplit(x, ";") %>% unlist() %>% unique()
  x<-x[!is.na(x)]
  
  return(x[!is.na(x)])
}

# now extract all

degs_ids<-lapply(res, FUN=function(x) 
  x %>%
  as_tibble(rownames = "gene_id") %>%
  left_join(xtrop) %>%
    pull_genes(feature = "gene_id", alpha = 0.05, lfc=0, signed = "both")
)
## Joining with `by = join_by(gene_id)`
## Joining with `by = join_by(gene_id)`
str(degs_ids)
## List of 2
##  $ central: chr [1:99] "PECUL23A000127" "PECUL23A000194" "PECUL23A000652" "PECUL23A001362" ...
##  $ south  : chr [1:1040] "PECUL23A000011" "PECUL23A000059" "PECUL23A000253" "PECUL23A000300" ...

After that, we join the annotations to those DEGs and arrange them by their lowest adjusted p-value to get more infromation on those DEGs.

extract_info <- function(sig_genes, deseq_result, xtrop) {
  # Convert DESeq results to tibble
  deseq_tibble <- as_tibble(deseq_result, rownames = "gene_id")
  
  # Filter DESeq results for significant DEGs based on gene_id
  filtered_deseq_result <- deseq_tibble %>%
    filter(gene_id %in% sig_genes)
  
  # Perform left join with annotation data
  detailed_info <- filtered_deseq_result %>%
    left_join(xtrop, by = "gene_id")
  
  return(detailed_info)
}
sig_degs_central <- extract_info(degs_ids$central, res$central, xtrop) %>%
    arrange(padj)
sig_degs_south <- extract_info(degs_ids$south, res$south, xtrop) %>%
    arrange(padj)

intersection <- extract_info(intersect(sig_degs_set$Central, sig_degs_set$South), res$south, xtrop)

write.csv(sig_degs_central, file = "sig_degs_central.csv")
write.csv(sig_degs_south, file = "sig_degs_south.csv")

Apoptosis

extract_info_by_keywords(res$central, c("mmp11", "apoptosis"), xtrop)
## # A tibble: 31 × 15
##    gene_id    baseMean log2FoldChange  lfcSE  stat  pvalue    padj transcript_id
##    <chr>         <dbl>          <dbl>  <dbl> <dbl>   <dbl>   <dbl> <chr>        
##  1 PECUL23A0…  2303.            0.577 0.126   4.56 5.10e-6 0.00310 PECUL23A0615…
##  2 PECUL23A0…    96.6           1.14  0.380   3.00 2.72e-3 0.193   PECUL23A0095…
##  3 PECUL23A0…   212.            0.220 0.111   1.98 4.74e-2 0.624   PECUL23A0382…
##  4 PECUL23A0…   367.           -0.123 0.0664 -1.86 6.32e-2 0.681   PECUL23A0177…
##  5 PECUL23A0…     2.52          0.943 0.616   1.53 1.26e-1 0.771   PECUL23A0261…
##  6 PECUL23A0…    43.5           1.82  1.25    1.45 1.46e-1 0.790   PECUL23A0547…
##  7 PECUL23A0…  1275.           -0.158 0.120  -1.31 1.91e-1 0.821   PECUL23A0412…
##  8 PECUL23A0…    69.5           0.204 0.157   1.30 1.93e-1 0.825   PECUL23A0180…
##  9 PECUL23A0…   745.            0.250 0.218   1.15 2.50e-1 0.857   PECUL23A0234…
## 10 PECUL23A0…   206.           -0.155 0.145  -1.07 2.85e-1 0.880   PECUL23A0303…
## # ℹ 21 more rows
## # ℹ 7 more variables: peptide_id <chr>, xenx_pep_id <chr>,
## #   xenx_gene_symbol <chr>, xenx_description <chr>, xenp_pep_id <chr>,
## #   xenp_gene_symbol <chr>, xenp_description <chr>
extract_info_by_keywords(res$south, c("mmp11", "apoptosis"), xtrop)
## # A tibble: 31 × 15
##    gene_id    baseMean log2FoldChange  lfcSE  stat  pvalue    padj transcript_id
##    <chr>         <dbl>          <dbl>  <dbl> <dbl>   <dbl>   <dbl> <chr>        
##  1 PECUL23A0…     43.5          5.03  1.19    4.22 2.49e-5 0.00232 PECUL23A0547…
##  2 PECUL23A0…    160.           0.520 0.160   3.24 1.20e-3 0.0253  PECUL23A0442…
##  3 PECUL23A0…    104.          -0.283 0.123  -2.30 2.12e-2 0.139   PECUL23A0044…
##  4 PECUL23A0…    206.          -0.315 0.146  -2.16 3.05e-2 0.174   PECUL23A0303…
##  5 PECUL23A0…     69.5          0.304 0.155   1.96 5.03e-2 0.228   PECUL23A0180…
##  6 PECUL23A0…   1273.          -0.159 0.0835 -1.90 5.75e-2 0.247   PECUL23A0303…
##  7 PECUL23A0…    202.          -0.231 0.139  -1.66 9.76e-2 0.331   PECUL23A0527…
##  8 PECUL23A0…   2303.           0.202 0.126   1.60 1.09e-1 0.352   PECUL23A0615…
##  9 PECUL23A0…     98.0         -0.256 0.186  -1.37 1.70e-1 0.439   PECUL23A0342…
## 10 PECUL23A0…    220.           0.103 0.0765  1.34 1.80e-1 0.454   PECUL23A0009…
## # ℹ 21 more rows
## # ℹ 7 more variables: peptide_id <chr>, xenx_pep_id <chr>,
## #   xenx_gene_symbol <chr>, xenx_description <chr>, xenp_pep_id <chr>,
## #   xenp_gene_symbol <chr>, xenp_description <chr>

Interactive Volcano Plots

# we can now turn this into an interactive plot:
ggplotly(gg_central, tooltip="text")
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
ggplotly(gg_south, tooltip="text")
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues